## As noted in the paper, not all variables used as "controls" were available for 2014
## at the time of writing. Results with the proper variables might change, though probably
## not mutch.

rm(list=ls(all=TRUE))
library(car)
library(xtable)
library(plyr)
library(reshape)
library(sem)
library(mvtnorm)

##### Declare some functions that make life easier ######
##### These were written for the AJPS paper 	   ######

plot.propensity <- function(x){
    plot(predict(x), residuals(x)/sd(residuals(x)), pch=".", main="Predicting Treatment",xlab="Fitted",ylab="St. Residuals")
    abline(h=0,lwd=0.8)
    abline(h=c(-2,2),lty=2)
    lines(lowess(predict(x), residuals(x)/sd(residuals(x))), xlab="", ylab="", lwd=1.5,col=2)
    if(class(x)=="lm"){text(max(predict(x)),min(residuals(x)/sd(residuals(x))),labels=paste("R2=",round(summary(x)$r.s,2)),pos=2)
                                yvar=x$model[,1]}
    if(class(x)=="survreg"){yvar=  x$y[,1]}
    }

log.transform <- function(x,off=0.1){
    if(sign(min(x,na.rm=T))!=1){x <- x + abs(min(x,na.rm=T)) + off} #move min to 1
    x <- log(x)
    return(x)
}


my.ps <- function(x,#x is the data, 
			reg, #reg is the ps.regression,
			S=10, #strata
			xvar, #dependent variable (vote share)
			valid, #valid votes,
			ctrls=T, #use controls within strata
			quant=T #use quantile or absolute strata
			){ 
    tm <- proc.time()
    x$ps <-  predict(reg) 
    treatment <- names(reg$model)[1]
    cat("\nEstimating impact of",treatment,"on",xvar,"\n")
    cat(sum(x$ps<0),"municipalities with predicted treatment lower than zero (coerced to 0)...\n");flush.console()
    x$ps[x$ps<0] <- 0 #truncate ps at zero   
    x$ps.strata <- if(quant==T){findInterval(x$ps,quantile(x$ps,prob=(1:(S-1))/S,na.rm=TRUE))+1 ### by quantiles
                  }else{ if(length(grep("alcance",treatment))==1){
                  					findInterval(x$ps,seq(0.1,.8,0.1))+1 ### break alcance fixed
                  }else{ if(length(grep("2002",treatment))==1){ #for 2002 use different brackets
                  					findInterval(x$ps,c(seq(0.1,0.5,0.1),seq(0.75,2.5,0.5)))+1
                  }else{findInterval(x$ps,c(seq(0.25,2,0.25)))+1 }}}### fixed intervals
                  
                  
    # CREATE STRATA OUTPUT OBJECT  
    all.strata <- data.frame(size=as.numeric(prop.table(table(x$ps.strata))),
                            PSmin=as.numeric(by(x$ps,x$ps.strata,min)),
                            PSmean=as.numeric(by(x$ps,x$ps.strata,mean)),
                            PSmax=as.numeric(by(x$ps,x$ps.strata,max)))#matrix to save results
    all.strata<- cbind(all.strata,matrix(unlist( by(x[,treatment],x$ps.strata,function(x) quantile(x, probs=c(0,0.5,1)))),ncol=3,byrow=T,dimnames=list(NULL,c("tmin","tmed","tmax"))))
    # DECLARE THE REGRESSION FORMULA
	x$growth <- log.transform(x[,paste("dgdpH.1.",gsub("\\D","",treatment),sep="")])
	controls <- if(ctrls==T){"+ps+growth"}else{"+ps"}

    reg.formula <- as.formula(paste(xvar,"~",treatment,controls)) #regression forumula!!!!!    
    #OLS
    tmp<- by(x, x$ps.strata, function(y) lm(reg.formula,data=y)) #regressions within strata
    tmp.out <- cbind(lmest=sapply(tmp, function(y) coef(y)[treatment]),
                     lmvar=sapply(tmp, function(y) diag(vcov(y))[treatment]))
    all.strata <- cbind(all.strata,tmp.out)
    #Counter factual based on the OLS estimates
    notreatment.data <-  sapply(tmp, function(y){out<-data.frame(y$model[,-which(names(y$model)==treatment)],tmp=0);
                                               names(out)[ncol(out)]<-treatment;return(out)}, simplify=F) #creates the counterfactual 
    options(warn = -1)
    pred0 <- mapply(predict,tmp,notreatment.data,interval="prediction")  
    pred  <-sapply(tmp,predict,interval="prediction")
    options(warn = 0)
    tmp <- cbind(pred.vs=do.call("rbind",pred),pred0.vs=do.call("rbind",pred0))
    rownames(tmp) <- gsub("^\\d\\.","",rownames(tmp))
    tmp <- tmp[rownames(x),]
    colnames(tmp) <- c("pred","lwr.pred","upr.pred","pred0","lwr.pred0","upr.pred0")
    x <- cbind(x,tmp)
    x$pred.swing <- round(x$pred*x[,valid])-(round(x$pred0*x[,valid]))
    x$pred.swing.upr <- round(x$upr.pred*x[,valid])-(round(x$upr.pred0*x[,valid]))
    x$pred.swing.lwr <- round(x$lwr.pred*x[,valid])-(round(x$lwr.pred0*x[,valid]))
    cct.votes <- paste("between",sum(x$pred.swing.lwr),"and",sum(x$pred.swing.upr))
    swing <- c(pe=sum(x$pred.swing),lwr=sum(x$pred.swing.lwr),upr=sum(x$pred.swing.upr))

     #Wrap up
     row.names(all.strata)<-1:nrow(all.strata)
     ate <- t(all.strata$size) %*% all.strata$lmest #weighted average of effect
     ate.se <- sqrt(sum(all.strata$lmvar*all.strata$size^2))
     all.ate <- matrix(c(ate,ate.se),nrow=2,ncol=1,dimnames=list(c("est","se"),c("total")))
     print(round(all.strata,3))
     cat("Average treatment effect(se)= ",ate,"(",ate.se,") p-value=",round(pnorm(ate/ate.se,lower=F)*2,3),"\n",sep = "")
     cat("Incumbent would have received",cct.votes,"less votes without CCT\n")
     cat("Time elapsed:",round((proc.time()-tm)[3],2),"seconds\n")
     return(list(datawithps=x,ate=all.ate,all.strata=all.strata,cct.votes=swing ))
}
     

plot.effects <- function(xx,xr=T,yr=T,bands=T,saveplot=NULL,folder=getwd()){ #plots effects
    cih <- xx$lmest + sqrt(xx$lmvar) * qnorm(0.975)
    cil <- xx$lmest - sqrt(xx$lmvar) * qnorm(0.975)
    if(is.logical(xr)){xlims <- c(0,1)}else{xlims <- xr}
    if(is.logical(yr)){ylims <- rep(max(abs(range(c(cih,cil)))),2)*c(-1,1)}else{ylims <- yr}
    if(is.null(saveplot)==F){pdf(file=paste(folder,"/fig-effects",
    									ifelse(the.quant==T,"Quantile","Abolute"),
    									saveplot,if(bands==T){"bands"},".pdf",
    									sep=""))}
    par(mar=c(4.5,4.5,1,1))
    plot(xlims,ylims,type="n",xlab="",ylab="",bty="n")
    polygon(x=c(xlims[1],rep(xx$PSmax[nrow(xx)],2),xlims[1]),y= c(rep(ylims[1],2),rep(ylims[2],2)),col=gray(0.75),border=NA)
    abline(v=c(0,xx$PSmax),lty=3,col=gray(1),lwd=2)
    abline(h=0,col=gray(0.5),lty=2)
    axis(side=1)
    if(bands==T){
    	lower.bound <- c(0,xx$PSmax)  #lower bound is upper bound in previous block
    	for(ii in 1:nrow(xx)){
    	polygon(x=c(lower.bound[ii],xx$PSmax[ii],xx$PSmax[ii],lower.bound[ii]),
            y=c(rep(cih[ii],2),rep(cil[ii],2)),col=gray(0.55),border=NA)
        lines(x=c(lower.bound[ii],xx$PSmax[ii]),y=rep(xx$lmest[ii],2),
        	lwd=1.3,col=gray(1))} #horizontal point estimates
    }else{
    	for(ii in 1:nrow(xx)){lines(x=rep(xx$PSmean[ii],2),y=c(cih[ii],cil[ii]),lwd=2)}
    	points(xx$PSmean,xx$lmest,cex=1.7,pch=21,bg=1) #point estimates
    }
    mtext(side=1,line=2.5,"Predicted Treatment",cex=1.3)
    mtext(side=2,line=2.5,"CCT Effect",cex=1.3)
    if(is.null(saveplot)==F){dev.off()}
}


#### Load the data #########
setwd("~/Dropbox/Data/Paper-BF-2014")  #set to the folder where data are!
load('~/Dropbox/Data/Paper-BF-2014/data_replicGPS2014.RData')
cat(comment(dd),"\n")
the.data <- na.omit(the.data)
cat(nrow(dd)-nrow(the.data),"municipalities dropped due to missing data\n")

######################################################################################
####																			  ####
#### Preliminary definitions												  	  ####
####																			  ####
######################################################################################
the.S<-10; 					#number of strata (only if using quantile strata)
the.quant<- F 				#use quantile strata (T) or absolute strata (F)
the.ctrl <- T				#use controls (growth and state fe) within strata regressions
ate.matrix <-  matrix(NA,nrow=2,ncol=2,
				dimnames=list(c("spend.2014",
								"scope.2014"),
							  c("ate","se")))
cctvotes.matrix <-   matrix(NA,nrow=2,ncol=3,
				dimnames=list(c( "spend.2014",
								 "scope.2014")
                           ,c("votes","lwr","upr")))
######################################################################################
####																			  ####
#### SPENDING,  BOTH ROUNDS  													  ####
####																			  ####
######################################################################################

prop.spend.2014 <- lm(spentH.2014.cct~hdi.2010+lula.vs.1998
							+I(hdi.2010*hdi.2010)+I(hdi.2010*hdi.2010*hdi.2010)
							+pent.2010+nonwhite.2010+distcap
							+I(log(gdpH.2014))+log.transform(gdpH.2014)+
                                +I(mayorpty.2012=="PSDB")+I(mayorpty.2012=="PT")
                                +I(incgov.2014=="PT")#+I(incgov.2014=="PSDB") collinear
                                +as.factor(state)
                               ,data=the.data)
plot.propensity(prop.spend.2014)

tmp.spend.2014<- my.ps(the.data,prop.spend.2014,S=the.S,
					xvar="dilma.vs.2014",quant=the.quant,
					valid="valid.2014",ctrl=the.ctrl)
plot.effects(tmp.spend.2014$all.strata,xr=c(0,2.5),bands=T)

ate.matrix[1,] <- tmp.spend.2014[[2]][,"total"] #save ate 
cctvotes.matrix[1,] <- tmp.spend.2014[[4]]


######################################################################################
####																			  ####
#### 	SCOPE, ALL YEARS, BOTH ROUNDS  (rounds in same year use same ps)	  	  ####
####																			  ####
######################################################################################

prop.alcance.2014 <- lm(alcance.2014~targetalcance.2014+lula.vs.1998+
                               +hdi.2010+I(hdi.2010*hdi.2010)
                               +I(hdi.2010*hdi.2010*hdi.2010)
                               +pent.2010+nonwhite.2010+distcap
                               +I(log(gdpH.2014))
                                +I(mayorpty.2012=="PSDB")+I(mayorpty.2012=="PT")
                                +I(incgov.2014=="PT")#+I(incgov.2014=="PSDB")#collinear
                                +as.factor(state)
                                ,data=the.data) 
plot.propensity(prop.alcance.2014)

tmp.alcance.2014<- my.ps(the.data,prop.alcance.2014,S=the.S,
					xvar="dilma.vs.2014",quant=the.quant,
					valid="valid.2014",ctrl=the.ctrl)
plot.effects(tmp.alcance.2014$all.strata,yr=c(-.8,.8))#to print to screen
ate.matrix[2,] <- tmp.alcance.2014[[2]][,"total"] #save ate 
cctvotes.matrix[2,] <- tmp.alcance.2014[[4]]


######################################################################################
####																			  ####
#### Basic OLS Regressions													  	  ####
#### (log transform growth to get rid of outliers)								  ####
######################################################################################
reg.2014 <- lm(dilma.vs.2014~ spentH.2014.cct+hdi.2010+log(pop.2011)+lula.vs.1998+
				distcap+
                nonwhite.2010+
                log.transform(dgdpH.1.2014)+
                +I(mayorpty.2012=="PSDB")+I(mayorpty.2012=="PT")
                 +I(incgov.2014=="PT")#+I(incgov.2014=="PSDB")#collinear
                 +as.factor(state)                +as.factor(state)
                , data=dd)
reg.alcance.2014 <- lm(dilma.vs.2014~alcance.2014+hdi.2010+log(pop.2011)+lula.vs.1998+
				distcap+
                nonwhite.2010+
                targetalcance.2014+
                log.transform(dgdpH.1.2014)+
                 +I(mayorpty.2012=="PSDB")+I(mayorpty.2012=="PT")
                 +I(incgov.2014=="PT")#+I(incgov.2014=="PSDB")#collinear
                 +as.factor(state)
                 , data=dd)


